############################################################################################################################
########Inclusion, recognition, and inter-group comparisons: The effects of power-sharing institutions on grievances########
#####Group-level mediation analyses and additional group-level models (appendix 3)#####
#####Andreas Juon, 2023 (Journal of Conflict Research)#####
############################################################################################################################

####################################################################################
#########STEP 1: LIBRARIES, DATA IMPORT, DEFINITIONS#########
####################################################################################

setwd("XXXXX")#set to directory that contains the data files

#######1.1 Libraries#######
library(mediation)
library(stargazer)
library(ggplot2)
library(grid)
library(gridExtra)
library(dotwhisker)
library(modeest)
library(boot)
library(tidyr)
library(broom)
library(plyr)
library(dplyr)

#######1.2 Reading Data#######
grievances_group_unsat <- read.csv(file="./juon_jcr_grievances_data_group_unsat.csv")
grievances_group_disc <- read.csv(file="./juon_jcr_grievances_data_group_disc.csv")
group_data_1992_2018 <- read.csv("./juon_jcr_grievances_data_group_1992_2018.csv")
group_data_1992_2018 <- subset(group_data_1992_2018, other == 0)

######1.3 Formula for clustering SE's
get_confint<-function(model, vcovCL){
  t<-qt(.975, model$df.residual)
  ct<-coeftest(model, vcovCL)
  est<-cbind(ct[,1], ct[,1]-t*ct[,2], ct[,1]+t*ct[,2])
  colnames(est)<-c("Estimate","LowerCI","UpperCI")
  return(est)
}
cluster.se<-function(model, cluster)
{
  require(multiwayvcov)
  require(lmtest)
  vcovCL<-cluster.vcov(model, cluster)
  coef<-coeftest(model, vcovCL)
  return(coef)
}

######1.4 Formula for extracting mediation summary
extract_mediation_summary <- function (x) { 
  
  clp <- 100 * x$conf.level
  isLinear.y <- ((class(x$model.y)[1] %in% c("lm", "rq")) || 
                   (inherits(x$model.y, "glm") && x$model.y$family$family == 
                      "gaussian" && x$model.y$family$link == "identity") || 
                   (inherits(x$model.y, "survreg") && x$model.y$dist == 
                      "gaussian"))
  
  printone <- !x$INT && isLinear.y
  
  if (printone) {
    
    smat <- c(x$d1, x$d1.ci, x$d1.p)
    smat <- rbind(smat, c(x$z0, x$z0.ci, x$z0.p))
    smat <- rbind(smat, c(x$tau.coef, x$tau.ci, x$tau.p))
    smat <- rbind(smat, c(x$n0, x$n0.ci, x$n0.p))
    
    rownames(smat) <- c("ACME", "ADE", "Total Effect", "Prop. Mediated")
    
  } else {
    smat <- c(x$d0, x$d0.ci, x$d0.p)
    smat <- rbind(smat, c(x$d1, x$d1.ci, x$d1.p))
    smat <- rbind(smat, c(x$z0, x$z0.ci, x$z0.p))
    smat <- rbind(smat, c(x$z1, x$z1.ci, x$z1.p))
    smat <- rbind(smat, c(x$tau.coef, x$tau.ci, x$tau.p))
    smat <- rbind(smat, c(x$n0, x$n0.ci, x$n0.p))
    smat <- rbind(smat, c(x$n1, x$n1.ci, x$n1.p))
    smat <- rbind(smat, c(x$d.avg, x$d.avg.ci, x$d.avg.p))
    smat <- rbind(smat, c(x$z.avg, x$z.avg.ci, x$z.avg.p))
    smat <- rbind(smat, c(x$n.avg, x$n.avg.ci, x$n.avg.p))
    
    rownames(smat) <- c("ACME (control)", "ACME (treated)", 
                        "ADE (control)", "ADE (treated)", "Total Effect", 
                        "Prop. Mediated (control)", "Prop. Mediated (treated)", 
                        "ACME (average)", "ADE (average)", "Prop. Mediated (average)")
    
  }
  
  colnames(smat) <- c("estimate", "lower", 
                      "upper", "p")
  smat
  
}

######1.5 Formula for combining ggplot legends
grid_arrange_shared_legend <- function(..., ncol = length(list(...)), nrow = 1, position = c("bottom", "right")) {
  plots <- list(...)
  position <- match.arg(position)
  g <- ggplotGrob(plots[[1]] + 
                    theme(legend.position = position))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  lwidth <- sum(legend$width)
  gl <- lapply(plots, function(x) x +
                 theme(legend.position = "none"))
  gl <- c(gl, ncol = ncol, nrow = nrow)
  
  combined <- switch(position,
                     "bottom" = arrangeGrob(do.call(arrangeGrob, gl), 
                                            legend,ncol = 1,
                                            heights = unit.c(unit(1, "npc") - lheight, lheight)),
                     "right" = arrangeGrob(do.call(arrangeGrob, gl),
                                           legend, ncol = 2,
                                           widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
  
  grid.newpage()
  grid.draw(combined)
  
  invisible(combined)
}


####################################################################################
#########STEP 2: Group-level survey models (Appendix 3.1.2)#########
####################################################################################

######a) unsat_gov (table A4)
m2_unsat_gov_group_med <- lm(as.formula(paste("included_nc", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_unsat, group_s_number >= 50))
cl_m2_unsat_gov_group_med <- data.frame(cluster.se(m2_unsat_gov_group_med, as.integer(subset(grievances_group_unsat, group_s_number >= 50)$cowcode))[, 2])
m2_unsat_gov_group_med2 <- lm(as.formula(paste("discriminated", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_unsat, group_s_number >= 50))
cl_m2_unsat_gov_group_med2 <- data.frame(cluster.se(m2_unsat_gov_group_med2, as.integer(subset(grievances_group_unsat, group_s_number >= 50)$cowcode))[, 2])
m2_unsat_gov_group_med3 <- lm(as.formula(paste("status_period_years_nc", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_unsat, group_s_number >= 50))
cl_m2_unsat_gov_group_med3 <- data.frame(cluster.se(m2_unsat_gov_group_med3, as.integer(subset(grievances_group_unsat, group_s_number >= 50)$cowcode))[, 2])
m2_unsat_gov_group <- lm(as.formula(paste("unsat_gov_mean", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","included_nc","status_period_years_nc","discriminated","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_unsat, group_s_number >= 50))
cl_m2_unsat_gov_group <- data.frame(cluster.se(m2_unsat_gov_group, as.integer(subset(grievances_group_unsat, group_s_number >= 50)$cowcode))[, 2])
stargazer(m2_unsat_gov_group_med, m2_unsat_gov_group_med3, m2_unsat_gov_group_med2, m2_unsat_gov_group, se = c(cl_m2_unsat_gov_group_med, cl_m2_unsat_gov_group_med3, cl_m2_unsat_gov_group_med2, cl_m2_unsat_gov_group), dep.var.labels = c("Included NC", "Status persistence", "Discriminated","% feeling discriminated"), type="text", style = "ajps", omit = c("survey","region"), notes = c("* p<0.1; ** p<0.05; *** p<0.01. Survey- and region-fixed effects included but not reported. Country-clustered standard errors in parentheses."), notes.append=FALSE, covariate.labels = c("Core","Corporate PSI (group)","Liberal PSI (group)","Included non-core","Status persistence","Discriminated","Relative size","non-EPR group","Recent contestation","CPI","Polity (normalized)","GDP p.c. (logged)","Ethnic fractionalization","Age mean","Female mean","High education mean","Political interest mean","Core x corporate PSI (group)","Core x liberal PSI (group)"))            
######b) disc_grp (table A5)
m2_disc_grp_group_med <- lm(as.formula(paste("included_nc", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region_CEE","region_MENA","region_SSA","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_disc, group_s_number >= 50))
cl_m2_disc_grp_group_med <- data.frame(cluster.se(m2_disc_grp_group_med, as.integer(subset(grievances_group_disc, group_s_number >= 50)$cowcode))[, 2])
m2_disc_grp_group_med2 <- lm(as.formula(paste("discriminated", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region_CEE","region_MENA","region_SSA","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_disc, group_s_number >= 50))
cl_m2_disc_grp_group_med2 <- data.frame(cluster.se(m2_disc_grp_group_med2, as.integer(subset(grievances_group_disc, group_s_number >= 50)$cowcode))[, 2])
m2_disc_grp_group_med3 <- lm(as.formula(paste("status_period_years_nc", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region_CEE","region_MENA","region_SSA","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_disc, group_s_number >= 50))
cl_m2_disc_grp_group_med3 <- data.frame(cluster.se(m2_disc_grp_group_med3, as.integer(subset(grievances_group_disc, group_s_number >= 50)$cowcode))[, 2])
m2_disc_grp_group <- lm(as.formula(paste("disc_grp_mean", paste(c("survey","state_control","state_control * ps_corp","state_control * ps_lib","included_nc","status_period_years_nc","discriminated","size","other","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region_CEE","region_MENA","region_SSA","age_mean","gender_mean","educ_high_mean", "interest_pol_d_mean"), collapse = " + "), sep = " ~ ")), weights = group_s_number, data = subset(grievances_group_disc, group_s_number >= 50))
cl_m2_disc_grp_group <- data.frame(cluster.se(m2_disc_grp_group, as.integer(subset(grievances_group_disc, group_s_number >= 50)$cowcode))[, 2])
stargazer(m2_disc_grp_group_med, m2_disc_grp_group_med3, m2_disc_grp_group_med2, m2_disc_grp_group, se = c(cl_m2_disc_grp_group_med, cl_m2_disc_grp_group_med3, cl_m2_disc_grp_group_med2, cl_m2_disc_grp_group), dep.var.labels = c("Included NC", "Status persistence", "Discriminated","% feeling discriminated"), type="text", style = "ajps", omit = c("survey","region"), notes = c("* p<0.1; ** p<0.05; *** p<0.01. Survey- and region-fixed effects included but not reported. Country-clustered standard errors in parentheses."), notes.append=FALSE, covariate.labels = c("Core","Corporate PSI (group)","Liberal PSI (group)","Included non-core","Status persistence","Discriminated","Relative size","non-EPR group","Recent contestation","CPI","Polity (normalized)","GDP p.c. (logged)","Ethnic fractionalization","Age mean","Female mean","High education mean","Political interest mean","Core x corporate PSI (group)","Core x liberal PSI (group)"))


####################################################################################
#########STEP 3: Formal mediation analyses (appendix 3.1.3)#########
####################################################################################

######3.1 unsat_gov######

###a) mediator 1: included_nc
unsat_gov_med.out <- mediate(m2_unsat_gov_group_med, m2_unsat_gov_group, treat = "ps_corp", mediator = "included_nc", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_unsat, group_s_number >= 50)$cowcode)
unsat_gov_med.out_df <- data.frame(extract_mediation_summary(summary(unsat_gov_med.out)))
unsat_gov_med.out_df$model <- rownames(unsat_gov_med.out_df)
unsat_gov_med.out_df$conf.low <- unsat_gov_med.out_df$lower
unsat_gov_med.out_df$conf.high <- unsat_gov_med.out_df$upper
unsat_gov_med.out_df$term <- "Corporate PSI (group)"
unsat_gov_med.out_df <- subset(unsat_gov_med.out_df, model != "Prop. Mediated")
unsat_gov_med.out_plot <- dwplot(unsat_gov_med.out_df, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("a) included non-core") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 

###b) mediator 2: discriminated
unsat_gov_med.out2 <- mediate(m2_unsat_gov_group_med2, m2_unsat_gov_group, treat = "ps_corp", mediator = "discriminated", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_unsat, group_s_number >= 50)$cowcode)
unsat_gov_med.out_df2 <- data.frame(extract_mediation_summary(summary(unsat_gov_med.out2)))
unsat_gov_med.out_df2$model <- rownames(unsat_gov_med.out_df2)
unsat_gov_med.out_df2$conf.low <- unsat_gov_med.out_df2$lower
unsat_gov_med.out_df2$conf.high <- unsat_gov_med.out_df2$upper
unsat_gov_med.out_df2$term <- "Corporate PSI (group)"
unsat_gov_med.out_df2 <- subset(unsat_gov_med.out_df2, model != "Prop. Mediated")
unsat_gov_med.out_plot2 <- dwplot(unsat_gov_med.out_df2, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("c) discriminated") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 
unsat_gov_med.out_plot2

###c) mediator 3: status_period_years_nc
unsat_gov_med.out3 <- mediate(m2_unsat_gov_group_med3, m2_unsat_gov_group, treat = "ps_corp", mediator = "status_period_years_nc", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_unsat, group_s_number >= 50)$cowcode)
unsat_gov_med.out_df3 <- data.frame(extract_mediation_summary(summary(unsat_gov_med.out3)))
unsat_gov_med.out_df3$model <- rownames(unsat_gov_med.out_df3)
unsat_gov_med.out_df3$conf.low <- unsat_gov_med.out_df3$lower
unsat_gov_med.out_df3$conf.high <- unsat_gov_med.out_df3$upper
unsat_gov_med.out_df3$term <- "Corporate PSI (group)"
unsat_gov_med.out_df3 <- subset(unsat_gov_med.out_df3, model != "Prop. Mediated")
unsat_gov_med.out_plot3 <- dwplot(unsat_gov_med.out_df3, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("b) status persistence") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 

###d) combine figure A3
figure_a3 <- grid_arrange_shared_legend(unsat_gov_med.out_plot + coord_cartesian(xlim = c(-0.18, 0.01)), unsat_gov_med.out_plot3 + coord_cartesian(xlim = c(-0.18, 0.01)), unsat_gov_med.out_plot2 + coord_cartesian(xlim = c(-0.18, 0.01)), nrow=3, ncol=1)


######3.2 disc_grp

###a) mediator 1: included_nc
disc_grp_med.out <- mediate(m2_disc_grp_group_med, m2_disc_grp_group, treat = "ps_corp", mediator = "included_nc", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_disc, group_s_number >= 50)$cowcode)
disc_grp_med.out_df <- data.frame(extract_mediation_summary(summary(disc_grp_med.out)))
disc_grp_med.out_df$model <- rownames(disc_grp_med.out_df)
disc_grp_med.out_df$conf.low <- disc_grp_med.out_df$lower
disc_grp_med.out_df$conf.high <- disc_grp_med.out_df$upper
disc_grp_med.out_df$term <- "Corporate PSI (group)"
disc_grp_med.out_df <- subset(disc_grp_med.out_df, model != "Prop. Mediated")
disc_grp_med.out_plot1 <- dwplot(disc_grp_med.out_df, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("a) included non-core") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 

###b) mediator 2: discriminated
disc_grp_med.out2 <- mediate(m2_disc_grp_group_med2, m2_disc_grp_group, treat = "ps_corp", mediator = "discriminated", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_disc, group_s_number >= 50)$cowcode)
disc_grp_med.out2_df <- data.frame(extract_mediation_summary(summary(disc_grp_med.out2)))
disc_grp_med.out2_df$model <- rownames(disc_grp_med.out2_df)
disc_grp_med.out2_df$conf.low <- disc_grp_med.out2_df$lower
disc_grp_med.out2_df$conf.high <- disc_grp_med.out2_df$upper
disc_grp_med.out2_df$term <- "Corporate PSI (group)"
disc_grp_med.out2_df <- subset(disc_grp_med.out2_df, model != "Prop. Mediated")
disc_grp_med.out_plot2 <- dwplot(disc_grp_med.out2_df, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("c) discriminated") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 

###c) mediator 3: status_period_years_nc
disc_grp_med.out3 <- mediate(m2_disc_grp_group_med3, m2_disc_grp_group, treat = "ps_corp", mediator = "status_period_years_nc", control.value = 0, treat.value = 1, sims = 100, conf.level = 0.9, cluster = subset(grievances_group_disc, group_s_number >= 50)$cowcode)
disc_grp_med.out3_df <- data.frame(extract_mediation_summary(summary(disc_grp_med.out3)))
disc_grp_med.out3_df$model <- rownames(disc_grp_med.out3_df)
disc_grp_med.out3_df$conf.low <- disc_grp_med.out3_df$lower
disc_grp_med.out3_df$conf.high <- disc_grp_med.out3_df$upper
disc_grp_med.out3_df$term <- "Corporate PSI (group)"
disc_grp_med.out3_df <- subset(disc_grp_med.out3_df, model != "Prop. Mediated")
disc_grp_med.out_plot3 <- dwplot(disc_grp_med.out3_df, vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), dot_args = list(aes(shape = model), colour = "black"), whisker_args = list(aes(linetype = model), colour = "black")) +
  theme_bw() + theme(plot.title = element_text(size=10), axis.title=element_text(size=8), axis.ticks.y = element_blank(), axis.text.y = element_blank()) + xlab("Effect estimate") + ylab("") +
  ggtitle("b) status persistence") + scale_shape_manual(name = "type", labels = c("total","ADE","ACME"), values = c(20,18,1)) + scale_linetype_manual(name = "type", labels = c("total","ADE","ACME"), values = c(1,2,3)) + theme(legend.position = "bottom", text=element_text(family="Times New Roman")) 

###d) combine figure A4
figure_a4 <- grid_arrange_shared_legend(disc_grp_med.out_plot1 + coord_cartesian(xlim = c(-0.13, 0.02)), disc_grp_med.out_plot3 + coord_cartesian(xlim = c(-0.13, 0.02)), disc_grp_med.out_plot2 + coord_cartesian(xlim = c(-0.13, 0.02)), nrow=3, ncol=1)



####################################################################################
#########STEP 4: Additional group-level models (appendix 3.2)#########
####################################################################################

######4.1 Additional group-level models (table A6)######
m_included <- glm(as.formula(paste("included_nc", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = subset(group_data_1992_2018, state_control == 0))
cl_m_included <- data.frame(cluster.se(m_included, as.integer(subset(group_data_1992_2018, state_control == 0)$cowcode))[, 2])
m_discriminated <- glm(as.formula(paste("discriminated", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = subset(group_data_1992_2018, state_control == 0))
cl_m_discriminated <- data.frame(cluster.se(m_discriminated, as.integer(subset(group_data_1992_2018, state_control == 0)$cowcode))[, 2])
m_downgraded <- glm(as.formula(paste("downgraded_inc_p1", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = subset(group_data_1992_2018, state_control == 0 & included == 1))
cl_m_downgraded <- data.frame(cluster.se(m_downgraded, as.integer(subset(group_data_1992_2018, state_control == 0 & included == 1)$cowcode))[, 2])
stargazer(m_included, m_downgraded, m_discriminated, se = c(cl_m_included, cl_m_downgraded, cl_m_discriminated), dep.var.labels = c("Included NC","Status downgraded","Discriminated"), type="text", style = "ajps", omit = c("survey","region"), notes = c("* p<0.1; ** p<0.05; *** p<0.01. Survey- and region-fixed effects included but not reported."), notes.append=FALSE, covariate.labels = c("Corporate PSI (group)","Liberal PSI (group)","Relative size","Recent contestation","CPI","Polity (normalized)","GDP p.c. (logged)","Ethnic fractionalization"))


######4.2 Predicted probability plots######

###a) mean/median/modal values, all/included groups, used for predicted probabilities
categorical_vars <- c("state_control","included_nc","ongoing_recent_contestation")
nominal_vars <- c("region","cowcode")
numeric_vars <- c("age","ps_corp","ps_lib","size","cpi","politya","lgdppc","fractionalization1")
#all groups
group_all_means <- subset(group_data_1992_2018, state_control == 0)
group_all_means <- group_all_means[c("cowcode","state_control","included_nc","ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region")]
group_all_means <- group_all_means[complete.cases(group_all_means),]
for (i in (categorical_vars)) {
  step1 <- paste("group_all_means$",i,"<- median(group_all_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
for (i in (nominal_vars)) {
  step1 <- paste("group_all_means$",i,"<- mlv(group_all_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
for (i in (numeric_vars)) {
  step1 <- paste("group_all_means$",i,"<- median(group_all_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
group_all_means <- unique(group_all_means)
group_all_means$merge <- 1
#included groups only
group_inc_means <- subset(group_data_1992_2018, state_control == 0 & included == 1)
group_inc_means <- group_inc_means[c("cowcode","state_control","included_nc","ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region")]
group_inc_means <- group_inc_means[complete.cases(group_inc_means),]
for (i in (categorical_vars)) {
  step1 <- paste("group_inc_means$",i,"<- median(group_inc_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
for (i in (nominal_vars)) {
  step1 <- paste("group_inc_means$",i,"<- mlv(group_inc_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
for (i in (numeric_vars)) {
  step1 <- paste("group_inc_means$",i,"<- median(group_inc_means$",i,")",sep="")
  eval(parse(text=c(step1)))
}
group_inc_means <- unique(group_inc_means)
group_inc_means$merge <- 1

###b) prediction dataframes
#corporate PSI, all groups
mgroup_all_pred_df <- left_join(data.frame(merge = 1, type = "corporate PSI", psi = seq(0, 1, length.out = 50), ps_corp = seq(0, 1, length.out = 50)), subset(group_all_means, select=-c(ps_corp)), by="merge")
mgroup_all_pred_df <- unique(mgroup_all_pred_df)
#corporate PSI, included groups
mgroup_inc_pred_df <- left_join(data.frame(merge = 1, type = "corporate PSI", psi = seq(0, 1, length.out = 50), ps_corp = seq(0, 1, length.out = 50)), subset(group_inc_means, select=-c(ps_corp)), by="merge")
mgroup_inc_pred_df <- unique(mgroup_inc_pred_df)
#liberal PSI, all groups
mgroup_all_pred_df_lib <- left_join(data.frame(merge = 1, type = "liberal PSI", psi = seq(0, 1, length.out = 50), ps_lib = seq(0, 1, length.out = 50)), subset(group_all_means, select=-c(ps_lib)), by="merge")
mgroup_all_pred_df_lib <- unique(mgroup_all_pred_df_lib)
#liberal PSI, included groups
mgroup_inc_pred_df_lib <- left_join(data.frame(merge = 1, type = "liberal PSI", psi = seq(0, 1, length.out = 50), ps_lib = seq(0, 1, length.out = 50)), subset(group_inc_means, select=-c(ps_lib)), by="merge")
mgroup_inc_pred_df_lib <- unique(mgroup_inc_pred_df_lib)

###c) predictions for corporate PSI
#included_nc
m_included_pred_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0)[inds,]
  m_included_temp <- glm(as.formula(paste("included_nc", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_included_temp, newdata = mgroup_all_pred_df, type ="response", re.form = NA)
}
m_included_pred <- boot(data = subset(group_data_1992_2018, state_control == 0), statistic = m_included_pred_fun, 100)
m_included_pred_tidy <- tidy(m_included_pred)
m_included_pred_df2 <- cbind(mgroup_all_pred_df, m_included_pred_tidy)
#downgraded
m_downgraded_pred_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0 & included == 1)[inds,]
  m_downgraded_temp <- glm(as.formula(paste("downgraded_inc_p1", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_downgraded_temp, newdata = mgroup_inc_pred_df, type ="response", re.form = NA)
}
m_downgraded_pred <- boot(data = subset(group_data_1992_2018, state_control == 0 & included == 1), statistic = m_downgraded_pred_fun, 100)
m_downgraded_pred_tidy <- tidy(m_downgraded_pred)
m_downgraded_pred_df2 <- cbind(mgroup_all_pred_df, m_downgraded_pred_tidy)
#discriminated
m_discriminated_pred_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0)[inds,]
  m_discriminated_temp <- glm(as.formula(paste("discriminated", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_discriminated_temp, newdata = mgroup_all_pred_df, type ="response", re.form = NA)
}
m_discriminated_pred <- boot(data = subset(group_data_1992_2018, state_control == 0), statistic = m_discriminated_pred_fun, 100)
m_discriminated_pred_tidy <- tidy(m_discriminated_pred)
m_discriminated_pred_df2 <- cbind(mgroup_all_pred_df, m_discriminated_pred_tidy)

###d) predictions for liberal PSI
#included_nc
m_included_pred_lib_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0)[inds,]
  m_included_temp <- glm(as.formula(paste("included_nc", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_included_temp, newdata = mgroup_all_pred_df_lib, type ="response", re.form = NA)
}
m_included_pred_lib <- boot(data = subset(group_data_1992_2018, state_control == 0), statistic = m_included_pred_lib_fun, 100)
m_included_pred_lib_tidy <- tidy(m_included_pred_lib)
m_included_pred_lib_df2 <- cbind(mgroup_all_pred_df_lib, m_included_pred_lib_tidy)
#downgraded
m_downgraded_pred_lib_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0 & included == 1)[inds,]
  m_downgraded_temp <- glm(as.formula(paste("downgraded_inc_p1", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_downgraded_temp, newdata = mgroup_inc_pred_df_lib, type ="response", re.form = NA)
}
m_downgraded_pred_lib <- boot(data = subset(group_data_1992_2018, state_control == 0 & included == 1), statistic = m_downgraded_pred_lib_fun, 100)
m_downgraded_pred_lib_tidy <- tidy(m_downgraded_pred_lib)
m_downgraded_pred_lib_df2 <- cbind(mgroup_all_pred_df_lib, m_downgraded_pred_lib_tidy)
#discriminated
m_discriminated_pred_lib_fun <- function(dat, inds)
{
  dat =  subset(group_data_1992_2018, state_control == 0)[inds,]
  m_discriminated_temp <- glm(as.formula(paste("discriminated", paste(c("ps_corp","ps_lib","size","ongoing_recent_contestation","cpi","politya","lgdppc","fractionalization1","region"), collapse = " + "), sep = " ~ ")), family = binomial, data = dat)
  pred.b <- predict(m_discriminated_temp, newdata = mgroup_all_pred_df_lib, type ="response", re.form = NA)
}
m_discriminated_pred_lib <- boot(data = subset(group_data_1992_2018, state_control == 0), statistic = m_discriminated_pred_lib_fun, 100)
m_discriminated_pred_lib_tidy <- tidy(m_discriminated_pred_lib)
m_discriminated_pred_lib_df2 <- cbind(mgroup_all_pred_df_lib, m_discriminated_pred_lib_tidy)

###e) plots (figure A5)
#included_nc
m_included_plot <- rbind.fill(m_included_pred_df2, m_included_pred_lib_df2)
m_included_plot2 <- ggplot(data = m_included_plot) + geom_line(aes(y=statistic, x=psi, group = factor(type), colour = factor(type), linetype = factor(type))) + 
  geom_ribbon(alpha = 0.25, aes(x=psi, ymin = statistic - 1.645 * std.error, ymax = statistic + 1.645 * std.error, group = factor(type), fill = factor(type))) +
  theme_bw() + theme(text=element_text(family='Times New Roman'), legend.position="bottom", plot.title = element_text(size=10), axis.title = element_text(size=10), axis.text = element_text(size=10)) + scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  xlab('PSI (group)') + ylab('predicted probability of\ngovernment inclusion') +
  scale_colour_manual(values = c("#e41a1c","#377eb8"), name="type") +
  scale_fill_manual(values = c("#e41a1c","#377eb8"), name="type")+
  scale_linetype_manual(values = c(1,2), name="type") +
  ggtitle("a) government inclusion")
#downgraded
m_downgraded_plot <- rbind.fill(m_downgraded_pred_df2, m_downgraded_pred_lib_df2)
m_downgraded_plot2 <- ggplot(data = m_downgraded_plot) + geom_line(aes(y=statistic, x=psi, group = factor(type), colour = factor(type), linetype = factor(type))) + 
  geom_ribbon(alpha = 0.25, aes(x=psi, ymin = statistic - 1.645 * std.error, ymax = statistic + 1.645 * std.error, group = factor(type), fill = factor(type))) +
  theme_bw() + theme(text=element_text(family='Times New Roman'), legend.position="bottom", plot.title = element_text(size=10), axis.title = element_text(size=10), axis.text = element_text(size=10)) + scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  xlab('PSI (group)') + ylab('predicted probability of\ndowngrading') +
  scale_colour_manual(values = c("#e41a1c","#377eb8"), name="type") +
  scale_fill_manual(values = c("#e41a1c","#377eb8"), name="type")+
  scale_linetype_manual(values = c(1,2), name="type") +
  ggtitle("b) downgrading")
#discriminated
m_discriminated_plot <- rbind.fill(m_discriminated_pred_df2, m_discriminated_pred_lib_df2)
m_discriminated_plot2 <- ggplot(data = m_discriminated_plot) + geom_line(aes(y=statistic, x=psi, group = factor(type), colour = factor(type), linetype = factor(type))) + 
  geom_ribbon(alpha = 0.25, aes(x=psi, ymin = statistic - 1.645 * std.error, ymax = statistic + 1.645 * std.error, group = factor(type), fill = factor(type))) +
  theme_bw() + theme(text=element_text(family='Times New Roman'), legend.position="bottom", plot.title = element_text(size=10), axis.title = element_text(size=10), axis.text = element_text(size=10)) + scale_y_continuous(labels = scales::percent_format(accuracy = 0.01)) +
  xlab('PSI (group)') + ylab('predicted probability of\ndiscrimination') +
  scale_colour_manual(values = c("#e41a1c","#377eb8"), name="type") +
  scale_fill_manual(values = c("#e41a1c","#377eb8"), name="type")+
  scale_linetype_manual(values = c(1,2), name="type") +
  ggtitle("c) discrimination")
figure_a5 <- grid_arrange_shared_legend(m_included_plot2, m_downgraded_plot2, m_discriminated_plot2, nrow=1, ncol=3)
